clear frames

* Create a basic dataset for all pid-wave periods that we then fill with cumulative class info
use "data/ready4wrangling.dta", clear

********************************************************************************
****** CLASS AND EMPLOYMENT STATUS SCHEMAS
********************************************************************************

* GOLDTHORPE EGP 	
label define egp 1 "Workers" 2 "Routine non-manual" 3 "Lower service" 4 "Higher service" 5 "Petty bourgeosie" ///
			  55 "Part-time work" 60 "Unemployed" 70 "Retired" 80 "Family care" 85 "Maternity leave" 90 "Education" 95 "Gov. training scheme" 100 "LT sick/disabled" 110 "NA" 120 "Other", modify
	
label save egp using "data/class.do", replace

********************************************************************************
****** CREATE FATHER'S AND CONCURRENT CLASS VARIABLES FROM INDIVIDUAL RESPONSE FILES (INDRESP)
********************************************************************************

	* EGP class
	recode jbgold (8 9 10 11=1) (3 4 =2) (2 = 3) (1=4) (5 6 7= 5) (nonm=.) , gen(egp_indresp)
		label val egp_indresp egp
		label var egp_indresp "Concurrent EGP class (5 classes + status)"
		* Fill with non-labour status
			replace egp_indresp=60 if jbstat==3 & mi(egp_indresp)
			replace egp_indresp=70 if jbstat==4 & mi(egp_indresp)
			replace egp_indresp=80 if jbstat==6 & mi(egp_indresp)
			replace egp_indresp=85 if jbstat==5 & mi(egp_indresp)
			replace egp_indresp=90 if jbstat==7 & mi(egp_indresp)
			replace egp_indresp=95 if jbstat==9 & mi(egp_indresp)
			replace egp_indresp=100 if jbstat==8 & mi(egp_indresp)
			replace egp_indresp=110 if jbstat==97 & mi(egp_indresp)

	* Father's Goldthorpe class
	recode pagold_bh (8 9 10 11=1) (3 4 =2) (2 = 3) (1=4) (5 6 7= 5) (nonm=.) , gen(p_egp)
		replace p_egp=120 if mi(p_egp)
		label val p_egp egp
		label var p_egp "Father's EGP class"
		
keeporder pidp pid wave scend_dv feend birthy birthm istrtdatm year p_egp egp_indresp 
	
********************************************************************************
****** CREATE EMPTY CLASS HISTORY PANEL TO FILL IN WITH WORK HISTORY DATA
********************************************************************************

	drop if mi(istrtdatm)

	gen domy=ym(birthy,birthm)
	gen now=ym(year,istrtdatm)

	* Set it so Jan 1900 is 0 of scale 
	gen domy_1900 = domy - ym(1900,1)
	gen now_1900 = now - ym(1900,1)
	gen age14m=domy_1900+14*12 // 14 years

	* Record last observed month for each respondent (note that this isn't necessarily last observed employment status!)
	bys pidp (year): egen last=max(now_1900)

	* Create pid-wave dataset that shows month number for all waves + concurrent class
	preserve 
		keeporder pid wave now_1900 egp_indresp 
		rename now_1900 month
		save "data/wavemonth.dta",replace
	restore


	* Use education variables to define initial entry to labor market
	bys pidp (year): egen scmax = max(scend_dv) // Standardize fe within pidp
	bys pidp (year): egen femax = max(feend) // Standardize fe within pidp

	gen finishschool=.
		replace finishschool = scmax if inrange(scmax,10,20)
		replace finishschool = femax if femax>scmax & inrange(femax,16,25)
	label var finishschool "Age when left education (cap 25)"
	
	* Create dataset with all months from 14 to last observed month for each pidp
	* First and last observed month by pidp
	collapse age14m last, by(pidp pid domy finishschool p_egp)

	rename age14m t1
	rename last t2 

	reshape long t, i(pidp pid domy finishschool p_egp) j(month)
	drop month 
	rename t month

	tsset pid month
	format domy %tm

	tsfill 

	bys pid (month): carryforward pidp, replace 
	bys pid (month): carryforward domy, replace 
	bys pid (month): carryforward finishschool, replace 
	bys pid (month): carryforward p_egp, replace 

	* Add 60 years so we can create nice date
	gen currentmy = month - 12*60 
	format currentmy %tm

	gen age = (currentmy-domy)/12
	format age %3.1f

* Now we have a dataset for each pidp-month from age 14 to last observed month

********************************************************************************
****** CREATE CLASS VARIABLES FROM WORKLIFE HISTORY
********************************************************************************

frame create new 
frame change new
	do "data/class.do"
	use $BHPSlifehistory/wlh.dta, clear // HAVE TO CHANGE THIS MANUALLY!

* Goldthorpe class schema				  
	recode gold (8 9 10 11=1) (3 4=2) (2 = 3) (1=4) (5 6 = 5) (nonm=.) , gen(egp)
	*replace egp= 55 if status==2
	replace egp= 60 if status==3
	replace egp= 70 if status==4
	replace egp= 80 if status==6
	replace egp= 85 if status==5
	replace egp= 90 if status==7
	replace egp= 95 if status==9
	replace egp= 100 if status==8
	replace egp=110 if mi(egp) | status==10
	
	label val egp egp
	label var egp "EGP class (5 classes+status)"
		
* Record last observed complete month for each respondent 
	bys pid (sort): egen complete=max(end)
	label var complete "Employment history complete until this month"
	
	keeporder pid sort start end egp complete

********************************************************************************
****** FILL PANEL WITH WORKLIFE HISTORY
********************************************************************************
	
* Shift to long format	
	rename start t1 
	rename end t2 
	
* Drop spells with zero length
	gen length=t2-t1
	drop if length==0
	drop length
		
	reshape long t, i(pid sort egp complete) j(month)

* End and start overlap in most cases. If conflict, we drop end
	bys pid t: gen d=_N
	drop if d!=1 & month==2
	
* One obs that needs special care
	drop d 
	bys pid t: gen d=_N
	bys pid t (sort): gen n=_n 
	drop if d!=1 & n==2

	drop d month 

	rename t month
	tsset pid month

* End up with a pidp-month dataset with class info that we then match to original data
	
frame change default

	frlink 1:1 pid month, frame(new)
	frget egp complete, from(new)
	
frame drop new 

	bys pid (month): carryforward egp, replace 
	bys pid (month): carryforward complete, replace 

* Drop any month included after last observed complete employment data
	replace egp=. if month>complete & !mi(month)

* Now we merge in wave-month data, including concurrent class
merge 1:1 pid month using "data/wavemonth.dta", nogen

********************************************************************************
****** CREATE CUMULATIVE CLASS VARIABLES
********************************************************************************

do "data/class.do"
	label val egp egp

recode egp (6/max=.), gen(egp5)
	label val egp5 egp
	label var egp5 "EGP class (5 classes)"

tabulate egp,gen(egp_)
				
foreach n in 1 2 3 4 5 6 7 8 9 10 11 12 {
	// Cumulative number of months in class
	bys pid (month): gen egp`n'months = sum(egp_`n') 
}

* Goldthorpe EGP
label variable egp1months "Months as: Worker"
label variable egp2months "Months as: Routine non-manual"
label variable egp3months "Months as: Lower service"
label variable egp4months "Months as: Higher service"
label variable egp5months "Months as: Petty bourgeosie"
label variable egp6months "Months as: Unemployed"
label variable egp7months "Months as: Retired"
label variable egp8months "Months as: Family care"
label variable egp9months "Months as: Maternity leave"
label variable egp10months "Months as: Education"
label variable egp11months "Months as: Government train. scheme"
label variable egp12months "Months as: LT sick/disabled"

********************************************************************************
****** COUNT NUMBER OF MONTHS IN DIFFERENT SPELLS
********************************************************************************

* Overall
egen nummonths = rowtotal(egp*months), missing
label variable nummonths "Total number of months since finished school"

* Employment, EGP	
egen egp_nummonths = rowtotal(egp1months egp2months egp3months egp4months egp5months), missing
label variable egp_nummonths "Total number of months in Goldthorpe 5-classes"

keep pidp wave p_egp egp_indresp egp egp5 *months *ummonth* 
order egp_numm* , after(nummonths)

drop if mi(wave)
drop if mi(pidp)

save data/wave_classhistory, replace

